Copy Number Pipeline

In [9]:
from __future__ import print_function
import os.path
import pandas as pd
from IPython.display import Image,display
import dalmatian as dm
from IPython.core.display import HTML 
from CCLE_postp_function import *
import sys
sys.path.insert(0, '../JKBio/')
import TerraFunction as terra
%load_ext autoreload
%autoreload 2
%load_ext rpy2.ipython
from taigapy import TaigaClient
tc = TaigaClient()
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython
In [2]:
namespace1="broad-genomics-delivery"
workspace1="Getz_IBM_CellLines_Exomes"
namespace2="broad-firecloud-ccle"
workspace2="CCLE_DepMap_WES"
refnamespace="broad-firecloud-ccle"
refworkspace="DepMap_WES_CN_HG38"
source1="ibm"
source2="ccle"
sample_set_id = "19Q3"
release = sample_set_id
In [3]:
wm1 = dm.WorkspaceManager(namespace1, workspace1)
wm2 = dm.WorkspaceManager(namespace2, workspace2)
refwm = dm.WorkspaceManager(refnamespace, refworkspace)
In [57]:
newsample = createDatasetWithNewCellLines(wm1,refwm,source1,sample_set_id,wm2,source2)
> /Users/jeremie/Documents/Projects/BroadInstitute/ccle_processing/CCLE_postp_function.py(32)createDatasetWithNewCellLines()
-> refsamples = wto.get_samples()
(Pdb) c
/Users/jeremie/Documents/Projects/BroadInstitute/ccle_processing/CCLE_postp_function.py:32: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  refsamples = wto.get_samples()
/Users/jeremie/Documents/Projects/BroadInstitute/ccle_processing/CCLE_postp_function.py:32: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  refsamples = wto.get_samples()
uploading new samples
Successfully imported 1581 participants.
Updating many hound records. Switching to batch updates
Hound executing batch upload of 2 records
Successfully imported 1806 samples.
Updating many hound records. Switching to batch updates
Hound executing batch upload of 270902 records
creating a sample set
Successfully imported 1 sample sets:
  * 19Q3interim (7 samples)

ReaLign in HG38

In [62]:
bamtoubam= "BamToUnmappedRGBams_MC"
ubamtofilelist = "Generate_uBAM_File_List"
realign="Realign_WES_GATK4"
In [63]:
bamtoubam = refwm.get_config(bamtoubam)
bamtoubam
Out[63]:
{'deleted': False,
 'inputs': {'BamToUnmappedRGBamsWf.input_bam': 'this.WES_bam',
  'BamToUnmappedRGBamsWf.preemptible_tries': '3',
  'BamToUnmappedRGBamsWf.ValidateSamFile.java_opt': '"-Xmx3000m"',
  'BamToUnmappedRGBamsWf.picard_docker': '"broadinstitute/genomes-in-the-cloud:2.3.1-1504795437"',
  'BamToUnmappedRGBamsWf.ref_fasta_index': 'workspace.ref_fasta_fai',
  'BamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.disk_size': '400',
  'BamToUnmappedRGBamsWf.SortBamByQueryname.mem_size': '"3500 MB"',
  'BamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.mem_size': '"3000 MB"',
  'BamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.output_dir': '"."',
  'BamToUnmappedRGBamsWf.SortBamByQueryname.disk_size': '400',
  'BamToUnmappedRGBamsWf.RevertBamToUnmappedRGBams.java_opt': '"-Xmx1200m"',
  'BamToUnmappedRGBamsWf.picard_path': '"/usr/gitc/"',
  'BamToUnmappedRGBamsWf.SortBamByQueryname.java_opt': '"-Xmx3000m"',
  'BamToUnmappedRGBamsWf.ref_fasta': 'workspace.ref_fasta',
  'BamToUnmappedRGBamsWf.ValidateSamFile.mem_size': '"3500 MB"',
  'BamToUnmappedRGBamsWf.ValidateSamFile.disk_size': '400'},
 'methodConfigVersion': 4,
 'methodRepoMethod': {'methodName': 'BamToUnmappedRGBams',
  'methodVersion': 3,
  'methodNamespace': 'vdauwera',
  'methodUri': 'agora://vdauwera/BamToUnmappedRGBams/3',
  'sourceRepo': 'agora'},
 'name': 'BamToUnmappedRGBams_MC',
 'namespace': 'vdauwera',
 'outputs': {'BamToUnmappedRGBamsWf.sortsam_out': 'this.readgroup_ubams',
  'BamToUnmappedRGBamsWf.validatesam_out': 'this.ubam_validation_reports'},
 'prerequisites': {},
 'rootEntityType': 'sample'}
In [64]:
refwm.update_config(bamtoubam)
subid = refwm.create_submission(bamtoubam['name'],sample_set_id,"sample_set","this.samples")
Successfully updated configuration vdauwera/BamToUnmappedRGBams_MC
Successfully created submission 8499ce99-8837-4fb5-97c3-bb75f3da9db8.
In [73]:
terra.waitForSubmission(refwm, subid)
1.0 of jobs Succeeded in submission 0.
Out[73]:
[]
In [74]:
ubamtofilelist = refwm.get_config(ubamtofilelist)
ubamtofilelist
Out[74]:
{'deleted': False,
 'inputs': {'ArrayToTxt_workflow.CreateTxt.array_of_files': 'this.readgroup_ubams',
  'ArrayToTxt_workflow.CreateTxt.list_name': 'this.name'},
 'methodConfigVersion': 2,
 'methodRepoMethod': {'methodName': 'ArrayOfFilesToTxt',
  'methodVersion': 1,
  'methodNamespace': 'gkugener',
  'methodUri': 'agora://gkugener/ArrayOfFilesToTxt/1',
  'sourceRepo': 'agora'},
 'name': 'Generate_uBAM_File_List',
 'namespace': 'gkugener',
 'outputs': {'ArrayToTxt_workflow.CreateTxt.file_list_name': 'this.unmapped_bams_list'},
 'prerequisites': {},
 'rootEntityType': 'sample'}
In [75]:
refwm.update_config(ubamtofilelist)
subid = refwm.create_submission(ubamtofilelist['name'],sample_set_id,"sample_set","this.samples")
Successfully updated configuration gkugener/Generate_uBAM_File_List
Successfully created submission de547b06-05ed-48f5-8276-a9f263772300.
In [76]:
terra.waitForSubmission(refwm, subid)
1.0 of jobs Succeeded in submission 0.sion 0. 7 mn elapsed.
Out[76]:
[]
In [77]:
realign = refwm.get_config(realign)
realign
Out[77]:
{'deleted': False,
 'inputs': {'PreProcessingForVariantDiscovery_GATK4.dbSNP_vcf': 'workspace.dbsnp_138',
  'PreProcessingForVariantDiscovery_GATK4.gatk_launch_path': '"/gatk/"',
  'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_amb': 'workspace.ref_bwa_amb',
  'PreProcessingForVariantDiscovery_GATK4.agg_preemptible_tries': '3',
  'PreProcessingForVariantDiscovery_GATK4.ref_fasta_index': 'workspace.ref_fasta_fai',
  'PreProcessingForVariantDiscovery_GATK4.GetBwaVersion.mem_size': '"8 GB"',
  'PreProcessingForVariantDiscovery_GATK4.unmapped_bam_suffix': '".bam"',
  'PreProcessingForVariantDiscovery_GATK4.ref_dict': 'workspace.ref_dict',
  'PreProcessingForVariantDiscovery_GATK4.SortAndFixTags.java_opt_sort': '"-Xms4000m"',
  'PreProcessingForVariantDiscovery_GATK4.gotc_docker': '"broadinstitute/genomes-in-the-cloud:2.3.0-1501082129"',
  'PreProcessingForVariantDiscovery_GATK4.picard_docker': '"broadinstitute/genomes-in-the-cloud:2.3.0-1501082129"',
  'PreProcessingForVariantDiscovery_GATK4.BaseRecalibrator.java_opt': '"-Xms4000m"',
  'PreProcessingForVariantDiscovery_GATK4.agg_medium_disk': '500',
  'PreProcessingForVariantDiscovery_GATK4.GatherBamFiles.mem_size': '"8 GB"',
  'PreProcessingForVariantDiscovery_GATK4.dbSNP_vcf_index': 'workspace.dbsnp_138_idx',
  'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.mem_size': '"32 GB"',
  'PreProcessingForVariantDiscovery_GATK4.picard_path': '"/usr/gitc/"',
  'PreProcessingForVariantDiscovery_GATK4.ApplyBQSR.java_opt': '"-Xms3000m"',
  'PreProcessingForVariantDiscovery_GATK4.SortAndFixTags.java_opt_fix': '"-Xms500m"',
  'PreProcessingForVariantDiscovery_GATK4.gatk_docker': '"broadinstitute/gatk:4.beta.3"',
  'PreProcessingForVariantDiscovery_GATK4.flowcell_unmapped_bams_list': 'this.unmapped_bams_list',
  'PreProcessingForVariantDiscovery_GATK4.sample_name': 'this.sample_id',
  'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_alt': 'workspace.ref_bwa_alt',
  'PreProcessingForVariantDiscovery_GATK4.SortAndFixTags.mem_size': '"16 GB"',
  'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.num_cpu': '"16"',
  'PreProcessingForVariantDiscovery_GATK4.agg_large_disk': '500',
  'PreProcessingForVariantDiscovery_GATK4.agg_small_disk': '300',
  'PreProcessingForVariantDiscovery_GATK4.GatherBqsrReports.java_opt': '"-Xms3000m"',
  'PreProcessingForVariantDiscovery_GATK4.MergeBamAlignment.mem_size': '"8 GB"',
  'PreProcessingForVariantDiscovery_GATK4.compression_level': '5',
  'PreProcessingForVariantDiscovery_GATK4.CreateSequenceGroupingTSV.mem_size': '"8 GB"',
  'PreProcessingForVariantDiscovery_GATK4.ref_name': 'workspace.ref_name',
  'PreProcessingForVariantDiscovery_GATK4.bwa_commandline': '"bwa mem -K 100000000 -p -v 3 -t 16 -Y $bash_ref_fasta"',
  'PreProcessingForVariantDiscovery_GATK4.MergeBamAlignment.java_opt': '"-Xms3000m"',
  'PreProcessingForVariantDiscovery_GATK4.ref_fasta': 'workspace.ref_fasta',
  'PreProcessingForVariantDiscovery_GATK4.GatherBqsrReports.mem_size': '"8 GB"',
  'PreProcessingForVariantDiscovery_GATK4.flowcell_medium_disk': '300',
  'PreProcessingForVariantDiscovery_GATK4.MarkDuplicates.mem_size': '"16 GB"',
  'PreProcessingForVariantDiscovery_GATK4.flowcell_small_disk': '300',
  'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.java_opt': '"-Xms3000m"',
  'PreProcessingForVariantDiscovery_GATK4.known_indels_sites_VCFs': 'workspace.known_indels_array',
  'PreProcessingForVariantDiscovery_GATK4.ApplyBQSR.mem_size': '"8 GB"',
  'PreProcessingForVariantDiscovery_GATK4.known_indels_sites_indices': 'workspace.known_indels_idx_array',
  'PreProcessingForVariantDiscovery_GATK4.BaseRecalibrator.mem_size': '"8 GB"',
  'PreProcessingForVariantDiscovery_GATK4.preemptible_tries': '3',
  'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_sa': 'workspace.ref_bwa_sa',
  'PreProcessingForVariantDiscovery_GATK4.GatherBamFiles.java_opt': '"-Xms2000m"',
  'PreProcessingForVariantDiscovery_GATK4.python_docker': '"python:2.7"',
  'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_ann': 'workspace.ref_bwa_ann',
  'PreProcessingForVariantDiscovery_GATK4.MarkDuplicates.java_opt': '"-Xms4000m"',
  'PreProcessingForVariantDiscovery_GATK4.gotc_path': '"/usr/gitc/"',
  'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_bwt': 'workspace.ref_bwa_bwt',
  'PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_pac': 'workspace.ref_bwa_pac'},
 'methodConfigVersion': 8,
 'methodRepoMethod': {'methodName': 'PreProcessingForVariantDiscovery_GATK4',
  'methodVersion': 7,
  'methodNamespace': 'gatk',
  'methodUri': 'agora://gatk/PreProcessingForVariantDiscovery_GATK4/7',
  'sourceRepo': 'agora'},
 'name': 'Realign_WES_GATK4',
 'namespace': 'gatk',
 'outputs': {'PreProcessingForVariantDiscovery_GATK4.bqsr_report': 'this.hg38_bqsr_report',
  'PreProcessingForVariantDiscovery_GATK4.duplication_metrics': 'this.hg38_duplication_metrics',
  'PreProcessingForVariantDiscovery_GATK4.analysis_ready_bam_md5': 'this.hg38_analysis_ready_bam_md5',
  'PreProcessingForVariantDiscovery_GATK4.analysis_ready_bam': 'this.hg38_analysis_ready_bam',
  'PreProcessingForVariantDiscovery_GATK4.analysis_ready_bam_index': 'this.hg38_analysis_ready_bam_index'},
 'prerequisites': {},
 'rootEntityType': 'sample'}
In [78]:
refwm.update_config(realign)
subid = refwm.create_submission(realign['name'],sample_set_id,"sample_set","this.samples")
Successfully updated configuration gatk/Realign_WES_GATK4
Successfully created submission ff0fbd27-8a5d-4760-8110-c9b464d23b66.
In [81]:
terra.waitForSubmission(refwm, subid)
status is: Failed for 0 jobs in submission 0. 99 mn elapsed.
----------------------------------------------------------
OSError                  Traceback (most recent call last)
/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    599                                                   body=body, headers=headers,
--> 600                                                   chunked=chunked)
    601 

/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    383                     # otherwise it looks like a programming error was the cause.
--> 384                     six.raise_from(e, None)
    385         except (SocketTimeout, BaseSSLError, SocketError) as e:

/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)

/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    379                 try:
--> 380                     httplib_response = conn.getresponse()
    381                 except Exception as e:

/anaconda3/envs/py36/lib/python3.6/http/client.py in getresponse(self)
   1330             try:
-> 1331                 response.begin()
   1332             except ConnectionError:

/anaconda3/envs/py36/lib/python3.6/http/client.py in begin(self)
    296         while True:
--> 297             version, status, reason = self._read_status()
    298             if status != CONTINUE:

/anaconda3/envs/py36/lib/python3.6/http/client.py in _read_status(self)
    257     def _read_status(self):
--> 258         line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
    259         if len(line) > _MAXLINE:

/anaconda3/envs/py36/lib/python3.6/socket.py in readinto(self, b)
    585             try:
--> 586                 return self._sock.recv_into(b)
    587             except timeout:

/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs)
    301             else:
--> 302                 raise SocketError(str(e))
    303         except OpenSSL.SSL.ZeroReturnError:

OSError: (60, 'ETIMEDOUT')

During handling of the above exception, another exception occurred:

ProtocolError            Traceback (most recent call last)
/anaconda3/envs/py36/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    448                     retries=self.max_retries,
--> 449                     timeout=timeout
    450                 )

/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    637             retries = retries.increment(method, url, error=e, _pool=self,
--> 638                                         _stacktrace=sys.exc_info()[2])
    639             retries.sleep()

/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    367             if read is False or not self._is_method_retryable(method):
--> 368                 raise six.reraise(type(error), error, _stacktrace)
    369             elif read is not None:

/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
    684         if value.__traceback__ is not tb:
--> 685             raise value.with_traceback(tb)
    686         raise value

/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    599                                                   body=body, headers=headers,
--> 600                                                   chunked=chunked)
    601 

/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    383                     # otherwise it looks like a programming error was the cause.
--> 384                     six.raise_from(e, None)
    385         except (SocketTimeout, BaseSSLError, SocketError) as e:

/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)

/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    379                 try:
--> 380                     httplib_response = conn.getresponse()
    381                 except Exception as e:

/anaconda3/envs/py36/lib/python3.6/http/client.py in getresponse(self)
   1330             try:
-> 1331                 response.begin()
   1332             except ConnectionError:

/anaconda3/envs/py36/lib/python3.6/http/client.py in begin(self)
    296         while True:
--> 297             version, status, reason = self._read_status()
    298             if status != CONTINUE:

/anaconda3/envs/py36/lib/python3.6/http/client.py in _read_status(self)
    257     def _read_status(self):
--> 258         line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
    259         if len(line) > _MAXLINE:

/anaconda3/envs/py36/lib/python3.6/socket.py in readinto(self, b)
    585             try:
--> 586                 return self._sock.recv_into(b)
    587             except timeout:

/anaconda3/envs/py36/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs)
    301             else:
--> 302                 raise SocketError(str(e))
    303         except OpenSSL.SSL.ZeroReturnError:

ProtocolError: ('Connection aborted.', OSError("(60, 'ETIMEDOUT')",))

During handling of the above exception, another exception occurred:

ConnectionError          Traceback (most recent call last)
<ipython-input-81-b8781f508f0e> in <module>
----> 1 terra.waitForSubmission(refwm, subid)

~/Documents/Projects/BroadInstitute/JKBio/TerraFunction.py in waitForSubmission(wm, submissions)
     21       failed = 0
     22       finished=True
---> 23       for wcount, i in enumerate(wm.get_submission(submission_id)["workflows"]):
     24         if i['status'] not in {'Done', 'Aborted', 'Failed', 'Succeeded'}:
     25           finished=False

/anaconda3/envs/py36/lib/python3.6/site-packages/dalmatian/base.py in get_submission(self, submission_id)
    456     def get_submission(self, submission_id):
    457         """Get submission metadata"""
--> 458         r = firecloud.api.get_submission(self.namespace, self.workspace, submission_id)
    459         if r.status_code != 200:
    460             raise APIException(r)

/anaconda3/envs/py36/lib/python3.6/site-packages/firecloud/api.py in get_submission(namespace, workspace, submission_id)
   1084     uri = "workspaces/{0}/{1}/submissions/{2}".format(namespace,
   1085                                             workspace, submission_id)
-> 1086     return __get(uri)
   1087 
   1088 def get_workflow_metadata(namespace, workspace, submission_id, workflow_id):

/anaconda3/envs/py36/lib/python3.6/site-packages/firecloud/api.py in __get(methcall, headers, root_url, **kwargs)
     90     if not headers:
     91         headers = _fiss_agent_header()
---> 92     r = __SESSION.get(urljoin(root_url, methcall), headers=headers, **kwargs)
     93     if fcconfig.verbosity > 1:
     94         print('FISSFC call: %s' % r.url, file=sys.stderr)

/anaconda3/envs/py36/lib/python3.6/site-packages/requests/sessions.py in get(self, url, **kwargs)
    544 
    545         kwargs.setdefault('allow_redirects', True)
--> 546         return self.request('GET', url, **kwargs)
    547 
    548     def options(self, url, **kwargs):

/anaconda3/envs/py36/lib/python3.6/site-packages/dalmatian/wmanager.py in _firecloud_api_timeout_wrapper(*args, **kwargs)
     73         **{
     74             **{'timeout': timeout_state.timeout},
---> 75             **kwargs
     76         }
     77     )

/anaconda3/envs/py36/lib/python3.6/site-packages/google/auth/transport/requests.py in request(self, method, url, data, headers, **kwargs)
    206 
    207         response = super(AuthorizedSession, self).request(
--> 208             method, url, data=data, headers=request_headers, **kwargs)
    209 
    210         # If the response indicated that the credentials needed to be

/anaconda3/envs/py36/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    531         }
    532         send_kwargs.update(settings)
--> 533         resp = self.send(prep, **send_kwargs)
    534 
    535         return resp

/anaconda3/envs/py36/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
    644 
    645         # Send the request
--> 646         r = adapter.send(request, **kwargs)
    647 
    648         # Total elapsed time of the request (approximately)

/anaconda3/envs/py36/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    496 
    497         except (ProtocolError, socket.error) as err:
--> 498             raise ConnectionError(err, request=request)
    499 
    500         except MaxRetryError as e:

ConnectionError: ('Connection aborted.', OSError("(60, 'ETIMEDOUT')",))
In [82]:
CNV_woXY = refwm.get_config("CNV_sample_XX")
CNV_woXY
Out[82]:
{'deleted': False,
 'inputs': {'CNVSomaticPairWorkflow.preemptible_attempts': '5',
  'CNVSomaticPairWorkflow.oncotator_docker': '',
  'CNVSomaticPairWorkflow.mem_gb_for_call_copy_ratio_segments': '',
  'CNVSomaticPairWorkflow.num_smoothing_iterations_per_fit': '',
  'CNVSomaticPairWorkflow.ModelSegmentsNormal.output_dir': '',
  'CNVSomaticPairWorkflow.PlotModeledSegmentsTumor.output_dir': '',
  'CNVSomaticPairWorkflow.calling_copy_ratio_z_score_threshold': '',
  'CNVSomaticPairWorkflow.minor_allele_fraction_prior_alpha': '',
  'CNVSomaticPairWorkflow.ModelSegmentsTumor.output_dir': '',
  'CNVSomaticPairWorkflow.gatk_docker': 'workspace.gatk_docker',
  'CNVSomaticPairWorkflow.num_changepoints_penalty_factor': '',
  'CNVSomaticPairWorkflow.common_sites': 'workspace.common_sites_hg38',
  'CNVSomaticPairWorkflow.tumor_bam_idx': 'this.hg38_analysis_ready_bam_index',
  'CNVSomaticPairWorkflow.PlotModeledSegmentsNormal.cpu': '',
  'CNVSomaticPairWorkflow.mem_gb_for_oncotator': '',
  'CNVSomaticPairWorkflow.neutral_segment_copy_ratio_upper_bound': '',
  'CNVSomaticPairWorkflow.minimum_base_quality': '',
  'CNVSomaticPairWorkflow.mem_gb_for_denoise_read_counts': '',
  'CNVSomaticPairWorkflow.genotyping_base_error_rate': '',
  'CNVSomaticPairWorkflow.emergency_extra_disk': '',
  'CNVSomaticPairWorkflow.ModelSegmentsNormal.cpu': '',
  'CNVSomaticPairWorkflow.CallCopyRatioSegmentsTumor.cpu': '',
  'CNVSomaticPairWorkflow.ModelSegmentsNormal.normal_allelic_counts': '',
  'CNVSomaticPairWorkflow.PlotModeledSegmentsNormal.output_dir': '',
  'CNVSomaticPairWorkflow.ref_fasta_fai': 'workspace.ref_fasta_fai',
  'CNVSomaticPairWorkflow.CollectCountsTumor.cpu': '',
  'CNVSomaticPairWorkflow.kernel_approximation_dimension': '',
  'CNVSomaticPairWorkflow.outlier_neutral_segment_copy_ratio_z_score_threshold': '',
  'CNVSomaticPairWorkflow.kernel_variance_copy_ratio': '',
  'CNVSomaticPairWorkflow.additional_args_for_oncotator': '',
  'CNVSomaticPairWorkflow.format': '',
  'CNVSomaticPairWorkflow.mem_gb_for_model_segments': '',
  'CNVSomaticPairWorkflow.mem_gb_for_plotting': '',
  'CNVSomaticPairWorkflow.min_total_allele_count': '',
  'CNVSomaticPairWorkflow.ref_fasta': 'workspace.ref_fasta',
  'CNVSomaticPairWorkflow.num_burn_in_allele_fraction': '',
  'CNVSomaticPairWorkflow.smoothing_threshold_allele_fraction': '',
  'CNVSomaticPairWorkflow.ModelSegmentsTumor.cpu': '',
  'CNVSomaticPairWorkflow.mem_gb_for_collect_counts': '',
  'CNVSomaticPairWorkflow.ref_fasta_dict': 'workspace.ref_dict',
  'CNVSomaticPairWorkflow.minimum_contig_length': '',
  'CNVSomaticPairWorkflow.smoothing_threshold_copy_ratio': '',
  'CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosNormal.output_dir': '',
  'CNVSomaticPairWorkflow.tumor_bam': 'this.hg38_analysis_ready_bam',
  'CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosTumor.cpu': '',
  'CNVSomaticPairWorkflow.normal_bam_idx': '',
  'CNVSomaticPairWorkflow.CollectAllelicCountsNormal.cpu': '',
  'CNVSomaticPairWorkflow.neutral_segment_copy_ratio_lower_bound': '',
  'CNVSomaticPairWorkflow.num_samples_allele_fraction': '',
  'CNVSomaticPairWorkflow.max_num_segments_per_chromosome': '',
  'CNVSomaticPairWorkflow.blacklist_intervals': '',
  'CNVSomaticPairWorkflow.kernel_scaling_allele_fraction': '',
  'CNVSomaticPairWorkflow.PlotModeledSegmentsTumor.cpu': '',
  'CNVSomaticPairWorkflow.mem_gb_for_preprocess_intervals': '',
  'CNVSomaticPairWorkflow.genotyping_homozygous_log_ratio_threshold': '',
  'CNVSomaticPairWorkflow.max_num_smoothing_iterations': '',
  'CNVSomaticPairWorkflow.PreprocessIntervals.cpu': '',
  'CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosTumor.output_dir': '',
  'CNVSomaticPairWorkflow.is_run_oncotator': '',
  'CNVSomaticPairWorkflow.padding': '250',
  'CNVSomaticPairWorkflow.mem_gb_for_collect_allelic_counts': '',
  'CNVSomaticPairWorkflow.normal_bam': '',
  'CNVSomaticPairWorkflow.CollectCountsNormal.cpu': '',
  'CNVSomaticPairWorkflow.num_burn_in_copy_ratio': '',
  'CNVSomaticPairWorkflow.PlotDenoisedCopyRatiosNormal.cpu': '',
  'CNVSomaticPairWorkflow.DenoiseReadCountsNormal.cpu': '',
  'CNVSomaticPairWorkflow.CollectAllelicCountsTumor.cpu': '',
  'CNVSomaticPairWorkflow.intervals': 'workspace.ice_xx_intervals_no_pad',
  'CNVSomaticPairWorkflow.gatk4_jar_override': '',
  'CNVSomaticPairWorkflow.CallCopyRatioSegmentsNormal.cpu': '',
  'CNVSomaticPairWorkflow.DenoiseReadCountsTumor.cpu': '',
  'CNVSomaticPairWorkflow.number_of_eigensamples': '',
  'CNVSomaticPairWorkflow.window_sizes': '',
  'CNVSomaticPairWorkflow.num_samples_copy_ratio': '',
  'CNVSomaticPairWorkflow.read_count_pon': 'workspace.ice_pon_xx',
  'CNVSomaticPairWorkflow.bin_length': '0',
  'CNVSomaticPairWorkflow.kernel_variance_allele_fraction': ''},
 'methodConfigVersion': 7,
 'methodRepoMethod': {'methodName': 'CNV_Somatic_Pair_Workflow',
  'methodVersion': 9,
  'methodNamespace': 'gatk',
  'methodUri': 'agora://gatk/CNV_Somatic_Pair_Workflow/9',
  'sourceRepo': 'agora'},
 'name': 'CNV_sample_XX',
 'namespace': 'gatk',
 'outputs': {'CNVSomaticPairWorkflow.copy_ratio_legacy_segments_normal': 'this.copy_ratio_legacy_segments_normal',
  'CNVSomaticPairWorkflow.het_allelic_counts_normal': 'this.het_allelic_counts_normal',
  'CNVSomaticPairWorkflow.allelic_counts_normal': 'this.allelic_counts_normal',
  'CNVSomaticPairWorkflow.copy_ratio_parameters_normal': 'this.copy_ratio_parameters_normal',
  'CNVSomaticPairWorkflow.allele_fraction_legacy_segments_normal': 'this.allele_fraction_legacy_segments_normal',
  'CNVSomaticPairWorkflow.normal_het_allelic_counts_normal': 'this.normal_het_allelic_counts_normal',
  'CNVSomaticPairWorkflow.allele_fraction_parameters_normal': 'this.allele_fraction_parameters_normal',
  'CNVSomaticPairWorkflow.modeled_segments_begin_tumor': 'this.modeled_segments_begin_tumor',
  'CNVSomaticPairWorkflow.copy_ratio_parameters_begin_normal': 'this.copy_ratio_parameters_begin_normal',
  'CNVSomaticPairWorkflow.copy_ratio_parameters_tumor': 'this.copy_ratio_parameters_tumor',
  'CNVSomaticPairWorkflow.denoised_MAD_normal': 'this.denoised_MAD_normal',
  'CNVSomaticPairWorkflow.scaled_delta_MAD_tumor': 'this.scaled_delta_MAD_tumor',
  'CNVSomaticPairWorkflow.het_allelic_counts_tumor': 'this.het_allelic_counts_tumor',
  'CNVSomaticPairWorkflow.scaled_delta_MAD_normal': 'this.scaled_delta_MAD_normal',
  'CNVSomaticPairWorkflow.standardized_MAD_tumor': 'this.standardized_MAD_tumor',
  'CNVSomaticPairWorkflow.denoised_copy_ratios_plot_tumor': 'this.denoised_copy_ratios_plot_tumor',
  'CNVSomaticPairWorkflow.copy_ratio_only_segments_tumor': 'this.copy_ratio_only_segments_tumor',
  'CNVSomaticPairWorkflow.oncotated_called_gene_list_file_tumor': 'this.oncotated_called_gene_list_file_tumor',
  'CNVSomaticPairWorkflow.denoised_copy_ratios_normal': 'this.denoised_copy_ratios_normal',
  'CNVSomaticPairWorkflow.called_copy_ratio_segments_normal': 'this.called_copy_ratio_segments_normal',
  'CNVSomaticPairWorkflow.denoised_copy_ratios_lim_4_plot_normal': 'this.denoised_copy_ratios_lim_4_plot_normal',
  'CNVSomaticPairWorkflow.oncotated_called_file_tumor': 'this.oncotated_called_file_tumor',
  'CNVSomaticPairWorkflow.allele_fraction_parameters_begin_normal': 'this.allele_fraction_parameters_begin_normal',
  'CNVSomaticPairWorkflow.delta_MAD_normal': 'this.delta_MAD_normal',
  'CNVSomaticPairWorkflow.denoised_MAD_tumor': 'this.denoised_MAD_tumor',
  'CNVSomaticPairWorkflow.normal_het_allelic_counts_tumor': 'this.normal_het_allelic_counts_tumor',
  'CNVSomaticPairWorkflow.copy_ratio_only_segments_normal': 'this.copy_ratio_only_segments_normal',
  'CNVSomaticPairWorkflow.modeled_segments_begin_normal': 'this.modeled_segments_begin_normal',
  'CNVSomaticPairWorkflow.denoised_copy_ratios_tumor': 'this.denoised_copy_ratios_tumor',
  'CNVSomaticPairWorkflow.denoised_copy_ratios_lim_4_plot_tumor': 'this.denoised_copy_ratios_lim_4_plot_tumor',
  'CNVSomaticPairWorkflow.allelic_counts_entity_id_normal': 'this.allelic_counts_entity_id_normal',
  'CNVSomaticPairWorkflow.modeled_segments_tumor': 'this.modeled_segments_tumor',
  'CNVSomaticPairWorkflow.allele_fraction_parameters_begin_tumor': 'this.allele_fraction_parameters_begin_tumor',
  'CNVSomaticPairWorkflow.allelic_counts_tumor': 'this.allelic_counts_tumor',
  'CNVSomaticPairWorkflow.read_counts_entity_id_tumor': 'this.read_counts_entity_id_tumor',
  'CNVSomaticPairWorkflow.standardized_MAD_normal': 'this.standardized_MAD_normal',
  'CNVSomaticPairWorkflow.delta_MAD_tumor': 'this.delta_MAD_tumor',
  'CNVSomaticPairWorkflow.read_counts_normal': 'this.read_counts_normal',
  'CNVSomaticPairWorkflow.read_counts_tumor': 'this.read_counts_tumor',
  'CNVSomaticPairWorkflow.modeled_segments_plot_normal': 'this.modeled_segments_plot_normal',
  'CNVSomaticPairWorkflow.modeled_segments_plot_tumor': 'this.modeled_segments_plot_tumor',
  'CNVSomaticPairWorkflow.denoised_copy_ratios_plot_normal': 'this.denoised_copy_ratios_plot_normal',
  'CNVSomaticPairWorkflow.modeled_segments_normal': 'this.modeled_segments_normal',
  'CNVSomaticPairWorkflow.allele_fraction_legacy_segments_tumor': 'this.allele_fraction_legacy_segments_tumor',
  'CNVSomaticPairWorkflow.allelic_counts_entity_id_tumor': 'this.allelic_counts_entity_id_tumor',
  'CNVSomaticPairWorkflow.copy_ratio_legacy_segments_tumor': 'this.copy_ratio_legacy_segments_tumor',
  'CNVSomaticPairWorkflow.standardized_copy_ratios_normal': 'this.standardized_copy_ratios_normal',
  'CNVSomaticPairWorkflow.called_copy_ratio_segments_tumor': 'this.called_copy_ratio_segments_tumor',
  'CNVSomaticPairWorkflow.read_counts_entity_id_normal': 'this.read_counts_entity_id_normal',
  'CNVSomaticPairWorkflow.preprocessed_intervals': 'this.preprocessed_intervals',
  'CNVSomaticPairWorkflow.standardized_copy_ratios_tumor': 'this.standardized_copy_ratios_tumor',
  'CNVSomaticPairWorkflow.allele_fraction_parameters_tumor': 'this.allele_fraction_parameters_tumor',
  'CNVSomaticPairWorkflow.copy_ratio_parameters_begin_tumor': 'this.copy_ratio_parameters_begin_tumor'},
 'prerequisites': {},
 'rootEntityType': 'sample'}
In [83]:
refwm.update_config(CNV_woXY)
submission_id = refwm.create_submission(CNV_woXY['name'],etype='sample_set',entity=sample_set_id,expression='this.samples')
Successfully updated configuration gatk/CNV_sample_XX
Successfully created submission 7bb83274-49d1-463c-9a4c-d120110fd42b.
In [84]:
terra.waitForSubmission(refwm,submission_id)
1.0 of jobs Succeeded in submission 0.sion 0. 70 mn elapsed.
Out[84]:
[]
In [85]:
aggregate = refwm.get_config("Aggregate_CN_seg_files")
aggregate
Out[85]:
{'deleted': False,
 'inputs': {'aggregate_CN_segments_wrkflw.aggregate_CN_segments.disk_space': '10',
  'aggregate_CN_segments_wrkflw.aggregate_CN_segments.num_preempt': '5',
  'aggregate_CN_segments_wrkflw.aggregate_CN_segments.aggregate_seg_files_script': 'workspace.cn_single_file_script',
  'aggregate_CN_segments_wrkflw.aggregate_CN_segments.memory': '5',
  'aggregate_CN_segments_wrkflw.aggregate_CN_segments.sample_set_id': 'this.name',
  'aggregate_CN_segments_wrkflw.aggregate_CN_segments.sample_seg_files': 'this.samples.called_copy_ratio_segments_tumor'},
 'methodConfigVersion': 4,
 'methodRepoMethod': {'methodName': 'Aggregate_CN_seg_files',
  'methodVersion': 2,
  'methodNamespace': 'gkugener',
  'methodUri': 'agora://gkugener/Aggregate_CN_seg_files/2',
  'sourceRepo': 'agora'},
 'name': 'Aggregate_CN_seg_files',
 'namespace': 'gkugener',
 'outputs': {'aggregate_CN_segments_wrkflw.aggregate_CN_segments.combined_cn_file': 'this.combined_seg_file'},
 'prerequisites': {},
 'rootEntityType': 'sample_set'}
In [86]:
submission_id = refwm.create_submission(aggregate['name'],entity=sample_set_id)
Successfully created submission 5c76b219-404f-479a-9b7c-7cf5699fc161.
In [87]:
terra.waitForSubmission(refwm,submission_id)
1.0 of jobs Succeeded in submission 0.sion 0. 1 mn elapsed.
Out[87]:
[]
In [88]:
aggregated = refwm.get_entities('sample_set').loc[sample_set_id]["combined_seg_file"]
aggregated
Out[88]:
'gs://fc-secure-d2a2d895-a7af-4117-bdc7-652d7d268324/5c76b219-404f-479a-9b7c-7cf5699fc161/aggregate_CN_segments_wrkflw/c0119f63-73d2-4f47-a6ec-88f24f74f3f2/call-aggregate_CN_segments/19Q3.called.seg'
In [89]:
! gsutil cp $aggregated "temp/cnv_ccle.called.seg"

Updates are available for some Cloud SDK components.  To install them,
please run:
  $ gcloud components update

Copying gs://fc-secure-d2a2d895-a7af-4117-bdc7-652d7d268324/5c76b219-404f-479a-9b7c-7cf5699fc161/aggregate_CN_segments_wrkflw/c0119f63-73d2-4f47-a6ec-88f24f74f3f2/call-aggregate_CN_segments/19Q3.called.seg...
- [1 files][ 67.2 KiB/ 67.2 KiB]                                                
Operation completed over 1 objects/67.2 KiB.                                     
In [90]:
%%R
source('../JKBio/gkugener/RScripts/load_libraries_and_annotations.R')
2019-07-05 11:45:25::WARNING  R[write to console]: 
Attachement du package : ‘dplyr’


2019-07-05 11:45:25::WARNING  R[write to console]: The following objects are masked from ‘package:plyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise,
    summarize


2019-07-05 11:45:25::WARNING  R[write to console]: The following objects are masked from ‘package:stats’:

    filter, lag


2019-07-05 11:45:25::WARNING  R[write to console]: The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


2019-07-05 11:45:26::WARNING  R[write to console]: ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──

2019-07-05 11:45:26::WARNING  R[write to console]: ✔ ggplot2 3.2.0     ✔ readr   1.3.1
✔ tibble  2.1.3     ✔ purrr   0.3.2
✔ tidyr   0.8.3     ✔ stringr 1.4.0
✔ ggplot2 3.2.0     ✔ forcats 0.4.0

2019-07-05 11:45:26::WARNING  R[write to console]: ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::arrange()   masks plyr::arrange()
✖ purrr::compact()   masks plyr::compact()
✖ dplyr::count()     masks plyr::count()
✖ dplyr::failwith()  masks plyr::failwith()
✖ dplyr::filter()    masks stats::filter()
✖ dplyr::id()        masks plyr::id()
✖ dplyr::lag()       masks stats::lag()
✖ dplyr::mutate()    masks plyr::mutate()
✖ dplyr::rename()    masks plyr::rename()
✖ dplyr::summarise() masks plyr::summarise()
✖ dplyr::summarize() masks plyr::summarize()

2019-07-05 11:45:26::WARNING  R[write to console]: 
Attachement du package : ‘magrittr’


2019-07-05 11:45:26::WARNING  R[write to console]: The following object is masked from ‘package:purrr’:

    set_names


2019-07-05 11:45:26::WARNING  R[write to console]: The following object is masked from ‘package:tidyr’:

    extract


2019-07-05 11:45:26::WARNING  R[write to console]: 
Attachement du package : ‘reshape2’


2019-07-05 11:45:26::WARNING  R[write to console]: The following object is masked from ‘package:tidyr’:

    smiths


2019-07-05 11:45:26::WARNING  R[write to console]: 
Attachement du package : ‘gridExtra’


2019-07-05 11:45:26::WARNING  R[write to console]: The following object is masked from ‘package:dplyr’:

    combine


2019-07-05 11:45:26::WARNING  R[write to console]: 
Attachement du package : ‘ggridges’


2019-07-05 11:45:26::WARNING  R[write to console]: The following object is masked from ‘package:ggplot2’:

    scale_discrete_manual


2019-07-05 11:45:26::WARNING  R[write to console]: Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2

2019-07-05 11:45:26::WARNING  R[write to console]: 
Attachement du package : ‘GGally’


2019-07-05 11:45:26::WARNING  R[write to console]: The following object is masked from ‘package:dplyr’:

    nasa


2019-07-05 11:45:26::WARNING  R[write to console]: 
Attachement du package : ‘plotly’


2019-07-05 11:45:26::WARNING  R[write to console]: The following object is masked from ‘package:ggplot2’:

    last_plot


2019-07-05 11:45:26::WARNING  R[write to console]: The following objects are masked from ‘package:plyr’:

    arrange, mutate, rename, summarise


2019-07-05 11:45:26::WARNING  R[write to console]: The following object is masked from ‘package:stats’:

    filter


2019-07-05 11:45:26::WARNING  R[write to console]: The following object is masked from ‘package:graphics’:

    layout


2019-07-05 11:45:26::WARNING  R[write to console]: Le chargement a nécessité le package : grid

2019-07-05 11:45:26::WARNING  R[write to console]: Le chargement a nécessité le package : futile.logger

2019-07-05 11:45:27::WARNING  R[write to console]: Registering fonts with R

2019-07-05 11:45:27::WARNING  R[write to console]: 
Attachement du package : ‘cowplot’


2019-07-05 11:45:27::WARNING  R[write to console]: The following object is masked from ‘package:ggthemes’:

    theme_map


2019-07-05 11:45:27::WARNING  R[write to console]: The following object is masked from ‘package:ggplot2’:

    ggsave


2019-07-05 11:45:27::WARNING  R[write to console]: Error in library(networkD3) : 
  aucun package nommé ‘networkD3’ n'est trouvé
Calls: <Anonymous> ... withVisible -> source -> withVisible -> eval -> eval -> library

Error in library(networkD3) : 
  aucun package nommé ‘networkD3’ n'est trouvé
Calls: <Anonymous> ... withVisible -> source -> withVisible -> eval -> eval -> library
In [144]:
%%R
source("CCLE_postp_function.R")
genome_version <- 'hg38'
release <- '19Q3'
hg38_cyto_band_reference <- 'data/hg38_cytoband.gz'
new_samples_copy_number_broad_wes <- 'temp/cnv_ccle.called.seg'
In [91]:
%%R
# Previous release copy number profiles. This line will need to be updated as well
wes.priority.cn.seg.profiles <- taigr::load.from.taiga(data.name='segmented-cn-wes-prioritzed-7fe1', data.file='wes.19Q3interim.segmented') %>%
  dplyr::select(DepMap_ID, Chromosome, Start, End, Num_Probes, Segment_Mean, Source)
wes.priority.cn.gene.profiles <- taigr::load.from.taiga(data.name='segmented-cn-wes-prioritzed-7fe1', data.file='wes.19Q3interim.gene')
Fetching https://cds.team/taiga/api/datafile?format=metadata&dataset_permaname=segmented-cn-wes-prioritzed-7fe1&datafile_name=wes.19Q3interim.segmented 
Status 200 
2019-07-05 11:46:35::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Could not find segmented-cn-wes-prioritzed-7fe1 in cache, requesting from taiga...
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.segmented 
Status 200 
2019-07-05 11:46:35::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Taiga needs to convert data to rds before we can fetch it.  Waiting...
Conversion pending 
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.segmented 
Status 200 
2019-07-05 11:46:36::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Running conversion 
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.segmented 
Status 200 
2019-07-05 11:46:38::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.segmented 
Status 200 
2019-07-05 11:46:40::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.segmented 
Status 200 
2019-07-05 11:46:43::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.segmented 
Status 200 
2019-07-05 11:46:49::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

2019-07-05 11:46:49::WARNING  R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.segmented ...

  |======================================================================| 100%
Saving segmented-cn-wes-prioritzed-7fe1 in cache ( 08b499f2757143d8addd9c6c89be643e wes.19Q3interim.segmented )...
2019-07-05 11:46:54::WARNING  R[write to console]: Saved to cache as 08b499f2757143d8addd9c6c89be643e_wes-19q3interim-segmented.rds

writing /Users/jeremie/.taiga/08b499f2757143d8addd9c6c89be643e_wes-19q3interim-segmented.idx /Users/jeremie/.taiga/segmented-cn-wes-prioritzed-7fe1_wes-19q3interim-segmented_28.idx 
Fetching https://cds.team/taiga/api/datafile?format=metadata&dataset_permaname=segmented-cn-wes-prioritzed-7fe1&datafile_name=wes.19Q3interim.gene 
Status 200 
2019-07-05 11:46:54::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Could not find segmented-cn-wes-prioritzed-7fe1 in cache, requesting from taiga...
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene 
Status 200 
2019-07-05 11:46:54::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Taiga needs to convert data to rds before we can fetch it.  Waiting...
Conversion pending 
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene 
Status 200 
2019-07-05 11:46:56::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Downloading from S3 
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene 
Status 200 
2019-07-05 11:46:57::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene 
Status 200 
2019-07-05 11:47:00::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Running conversion 
Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene 
Status 200 
2019-07-05 11:47:03::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene 
Status 200 
2019-07-05 11:47:08::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene 
Status 200 
2019-07-05 11:47:16::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

Fetching https://cds.team/taiga/api/datafile?format=rds&dataset_version_id=08b499f2757143d8addd9c6c89be643e&datafile_name=wes.19Q3interim.gene 
Status 200 
2019-07-05 11:47:26::WARNING  R[write to console]: No encoding supplied: defaulting to UTF-8.

2019-07-05 11:47:26::WARNING  R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...

  |======================================================================| 100%
2019-07-05 11:47:47::WARNING  R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...

  |======================================================================| 100%
2019-07-05 11:49:28::WARNING  R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...

  |======================================================================| 100%
2019-07-05 11:49:43::WARNING  R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...

  |======================================================================| 100%
2019-07-05 11:49:57::WARNING  R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...

  |======================================================================| 100%
2019-07-05 11:50:11::WARNING  R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...

  |======================================================================| 100%
2019-07-05 11:50:24::WARNING  R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...

  |======================================================================| 100%
2019-07-05 11:50:34::WARNING  R[write to console]: Downloading segmented-cn-wes-prioritzed-7fe1/v28/wes.19Q3interim.gene ...

  |======================================================================| 100%
Saving segmented-cn-wes-prioritzed-7fe1 in cache ( 08b499f2757143d8addd9c6c89be643e wes.19Q3interim.gene )...
2019-07-05 11:51:04::WARNING  R[write to console]: Saved to cache as 08b499f2757143d8addd9c6c89be643e_wes-19q3interim-gene.rds

writing /Users/jeremie/.taiga/08b499f2757143d8addd9c6c89be643e_wes-19q3interim-gene.idx /Users/jeremie/.taiga/segmented-cn-wes-prioritzed-7fe1_wes-19q3interim-gene_28.idx 
In [113]:
%%R
segments <- processSegments(new_samples_copy_number_broad_wes)
segments <- filterForCCLE(segments)
segments <- interpolateGapsInSegmented(segments)$segs
segments <- extendEndsOfSegments(segments,'../JKBio/data/hg38_cytoband.gz')
print(segments)
print(head(wes.priority.cn.seg.profiles))
segments_unfiltered <- reprioritizeData(segments, wes.priority.cn.seg.profiles)
# Fill in the gaps on the entire dataset
# Extend start sites to 1, end sites to the end of the chromosome?
[1] 0
2019-07-05 12:10:13::WARNING  R[write to console]: Parsed with column specification:
cols(
  X1 = col_character(),
  X2 = col_double(),
  X3 = col_double(),
  X4 = col_character(),
  X5 = col_character()
)

# A tibble: 1,276 x 7
   DepMap_ID      seqnames    start       end Num_Probes Segment_Mean Source   
   <chr>          <fct>       <dbl>     <dbl>      <dbl>        <dbl> <chr>    
 1 ibm_ACH-000658 chr1            1  12785414       1710        0.880 Broad WES
 2 ibm_ACH-000658 chr1     12785415  13084566         19        1.96  Broad WES
 3 ibm_ACH-000658 chr1     13084567  16448617        336        0.902 Broad WES
 4 ibm_ACH-000658 chr1     16448618  16976116         65        1.58  Broad WES
 5 ibm_ACH-000658 chr1     16976117  17085179         50        0.791 Broad WES
 6 ibm_ACH-000658 chr1     17085180  33327415       2124        0.891 Broad WES
 7 ibm_ACH-000658 chr1     33327416  33608515         47        1.12  Broad WES
 8 ibm_ACH-000658 chr1     33608516  85736977       3854        0.889 Broad WES
 9 ibm_ACH-000658 chr1     85736978  86120112         54        1.22  Broad WES
10 ibm_ACH-000658 chr1     86120113 111627657       1569        0.895 Broad WES
# … with 1,266 more rows
   DepMap_ID Chromosome    Start      End Num_Probes Segment_Mean     Source
1 ACH-000001          1        1  1969745        286     2.546065 Sanger WES
2 ACH-000001          1  1969746  6354345        365     2.175759 Sanger WES
3 ACH-000001          1  6354346  6958256        100     3.109430 Sanger WES
4 ACH-000001          1  6958257 15977206        884     2.134831 Sanger WES
5 ACH-000001          1 15977207 16174774         57     2.952592 Sanger WES
6 ACH-000001          1 16174775 16275770         19     1.710129 Sanger WES
# A tibble: 1,276 x 7
   DepMap_ID      Chromosome    Start       End Num_Probes Segment_Mean Source  
   <chr>          <fct>         <dbl>     <dbl>      <dbl>        <dbl> <chr>   
 1 ibm_ACH-000658 chr1              1  12785414       1710        0.880 Broad W…
 2 ibm_ACH-000658 chr1       12785415  13084566         19        1.96  Broad W…
 3 ibm_ACH-000658 chr1       13084567  16448617        336        0.902 Broad W…
 4 ibm_ACH-000658 chr1       16448618  16976116         65        1.58  Broad W…
 5 ibm_ACH-000658 chr1       16976117  17085179         50        0.791 Broad W…
 6 ibm_ACH-000658 chr1       17085180  33327415       2124        0.891 Broad W…
 7 ibm_ACH-000658 chr1       33327416  33608515         47        1.12  Broad W…
 8 ibm_ACH-000658 chr1       33608516  85736977       3854        0.889 Broad W…
 9 ibm_ACH-000658 chr1       85736978  86120112         54        1.22  Broad W…
10 ibm_ACH-000658 chr1       86120113 111627657       1569        0.895 Broad W…
# … with 1,266 more rows
In [114]:
%%R
embargoed <- "WES_embargoed.txt"
blacklisted <- "blacklist.txt"
In [149]:
%%R
library('rmutil')
read.list(file=blacklisted, skip=0, nlines=1, order=NULL)
2019-07-05 12:29:09::WARNING  R[write to console]: Registered S3 method overwritten by 'rmutil':
  method         from
  print.response httr

2019-07-05 12:29:09::WARNING  R[write to console]: 
Attaching package: ‘rmutil’


2019-07-05 12:29:09::WARNING  R[write to console]: The following object is masked from ‘package:AnnotationDbi’:

    as.data.frame


2019-07-05 12:29:09::WARNING  R[write to console]: The following object is masked from ‘package:IRanges’:

    as.data.frame


2019-07-05 12:29:09::WARNING  R[write to console]: The following object is masked from ‘package:S4Vectors’:

    as.data.frame


2019-07-05 12:29:09::WARNING  R[write to console]: The following object is masked from ‘package:Biobase’:

    description


2019-07-05 12:29:09::WARNING  R[write to console]: The following object is masked from ‘package:BiocGenerics’:

    as.data.frame


2019-07-05 12:29:09::WARNING  R[write to console]: The following object is masked from ‘package:stats4’:

    nobs


2019-07-05 12:29:09::WARNING  R[write to console]: The following object is masked from ‘package:tidyr’:

    nesting


2019-07-05 12:29:09::WARNING  R[write to console]: The following object is masked from ‘package:stats’:

    nobs


2019-07-05 12:29:09::WARNING  R[write to console]: The following objects are masked from ‘package:base’:

    as.data.frame, units


2019-07-05 12:29:09::WARNING  R[write to console]: Error in scan(file, skip = skip, nlines = nlines, quiet = TRUE) : 
  scan() attendait 'a real' et a reçu 'ACH-001434'
Calls: <Anonymous> ... <Anonymous> -> <Anonymous> -> withVisible -> read.list -> scan

Error in scan(file, skip = skip, nlines = nlines, quiet = TRUE) : 
  scan() attendait 'a real' et a reçu 'ACH-001434'
Calls: <Anonymous> ... <Anonymous> -> <Anonymous> -> withVisible -> read.list -> scan
In [116]:
%%R
head(corner(segments_unfiltered))
   DepMap_ID Chromosome    Start      End Num_Probes
1 ACH-000001          1        1  1969745        286
2 ACH-000001          1  1969746  6354345        365
3 ACH-000001          1  6354346  6958256        100
4 ACH-000001          1  6958257 15977206        884
5 ACH-000001          1 15977207 16174774         57
In [119]:
%%R
# TODO: IF seqnames (CHR) are 1-9 values, append "chr" in front of each! (bulk)
# ifelse(grepl("[0-9]+", new_copy_number$ChrChromosome), "chr"+, "no")
segments_blacklisted <- filterBlackListedLine(filepath=blacklisted,segments_unfiltered)
segments_embargoed <- filterBlackListedLine(filepath=embargoed,segments_unfiltered)
2019-07-05 12:11:30::WARNING  R[write to console]: Parsed with column specification:
cols(
  `ACH-001434` = col_character()
)

2019-07-05 12:11:30::WARNING  R[write to console]: Parsed with column specification:
cols(
  `ACH-001279` = col_character()
)

In [120]:
%%R
segments_unfiltered <- dplyr::rename(segments_unfiltered, seqnames=Chromosomes, start=Start, end=End)
2019-07-05 12:12:00::WARNING  R[write to console]: Error in .f(.x[[i]], ...) : objet 'Chromosomes' introuvable
Calls: <Anonymous> ... <Anonymous> -> vars_rename_eval -> map_if -> map -> .f

Error in .f(.x[[i]], ...) : objet 'Chromosomes' introuvable
Calls: <Anonymous> ... <Anonymous> -> vars_rename_eval -> map_if -> map -> .f
In [105]:
%%R
write.table(segments_unfiltered, file = paste0("temp/wes.",release,".segmented.cn"), sep = ',', quote = F, row.names = F)
In [127]:
%%R
dim(segments_unfiltered)
[1] 327663      7
In [ ]:
%%R
# What we upload to taiga
# TODO : change column name again
segments_blacklisted <- dplyr::rename(segments_blacklisted, seqnames=Chromosomes, start=Start, end=End)
segments_embargoed <- dplyr::rename(segments_embargoed, seqnames=Chromosomes, start=Start, end=End)
In [ ]:
%%R
write.table(segments_blacklisted, file = paste0('temp/wes.',release,'balcklisted.segmented.cn'), sep = ',', quote = F, row.names = T)
write.table(segments_embargoed, file = paste0('temp/wes.',release,'.embargoed.segmented.cn'), sep = ',', quote = F, row.names = T)
In [135]:
%%R
entrezgenes <- generateEntrezGenes()
In [142]:
%%R
head(corner(entrezgenes))
   EGID SYMBOL CHR    CHRLOC CHRLOCEND
1     1   A1BG  19  58346805  58353499
2    10   NAT2   8  18391281  18401215
3   100    ADA  20  44619518  44651758
4  1000   CDH2  18  27950962  28177130
5 10000   AKT3   1 243488232 243843584
In [165]:
%%R
source("CCLE_postp_function.R")
res <- generateGeneLevelMatrixFromSegments(entrezgenes, segments_unfiltered)
In [ ]:
%%R
res_embargoed <- generateGeneLevelMatrixFromSegments(entrezgenes, segments_embargoed)
res_balcklisted <- generateGeneLevelMatrixFromSegments(entrezgenes, segments_balcklisted)
In [166]:
%%R
genematrix_unfiltered <- res$gene_level_data_hg38
corner(genematrix_unfiltered)
           A1BG (1) NAT2 (10) ADA (100) CDH2 (1000) AKT3 (10000)
ACH-000001 1.265172 1.1415254 1.2958359   0.6627515     1.012791
ACH-000002 1.013633 0.9741737 1.0175121   1.4893275     1.012618
ACH-000003 1.031898 1.0958118 1.8768326   0.5364814     1.002498
ACH-000004 1.349290 1.0865089 0.7855092   1.0770610     1.086057
ACH-000005 1.222033 1.1622070 0.7612536   1.0724579     1.120937
In [ ]:
genematrix_embargoed <- res_embargoed$gene_level_data_hg38
corner(genematrix_embargoed)
genematrix_balcklisted <- res_balcklisted$gene_level_data_hg38
corner(genematrix_balcklisted)
In [167]:
%%R
write.table(genematrix_unfiltered, file = paste0('temp/wes.',release,'.gene.cn'), 
sep = ',', quote = F, row.names = T)
In [ ]:
write.table(genematrix_blacklisted, file = paste0('temp/wes.', release,'balcklisted.gene.cn'), 
      sep = ',', quote = F, row.names = T)
write.table(genematrix_embargoed, file = paste0('temp/wes.', release,'.embargoed.gene.cn'), 
      sep = ',', quote = F, row.names = T)

Validation step

In [24]:
genecn = pd.read_csv('temp/wes.'+release+'.gene.cn', sep = ',')
segmentcn = pd.read_csv('temp/cnv_ccle.called.seg', sep = '\t')
In [29]:
segmentcn
Out[29]:
Sample CONTIG START END NUM_POINTS_COPY_RATIO MEAN_LOG2_COPY_RATIO CALL
0 ibm_ACH-000658 chr1 785776 12777851 1710 -0.183785 -
1 ibm_ACH-000658 chr1 12792977 13053864 19 0.970490 +
2 ibm_ACH-000658 chr1 13115269 16448393 336 -0.149112 0
3 ibm_ACH-000658 chr1 16448842 16975982 65 0.662344 +
4 ibm_ACH-000658 chr1 16976250 17084089 50 -0.338767 -
5 ibm_ACH-000658 chr1 17086270 33326211 2124 -0.166508 0
6 ibm_ACH-000658 chr1 33328619 33606240 47 0.166090 +
7 ibm_ACH-000658 chr1 33610790 85736809 3854 -0.170009 0
8 ibm_ACH-000658 chr1 85737145 86115630 54 0.287622 +
9 ibm_ACH-000658 chr1 86124594 111564204 1569 -0.160833 0
10 ibm_ACH-000658 chr1 111691110 111739494 8 -1.558503 -
11 ibm_ACH-000658 chr1 111755451 121568451 713 -0.058538 0
12 ibm_ACH-000658 chr1 143880473 145784846 43 1.153831 +
13 ibm_ACH-000658 chr1 145788518 146020481 110 0.649050 +
14 ibm_ACH-000658 chr1 146069292 147162032 24 1.336794 +
15 ibm_ACH-000658 chr1 147162189 147993169 56 0.697550 +
16 ibm_ACH-000658 chr1 147995347 148149818 14 2.030036 +
17 ibm_ACH-000658 chr1 148482507 152315708 604 0.656568 +
18 ibm_ACH-000658 chr1 152350359 152412485 4 0.924756 +
19 ibm_ACH-000658 chr1 152511284 152885025 20 0.849826 +
20 ibm_ACH-000658 chr1 152909547 155033349 469 0.642168 +
21 ibm_ACH-000658 chr1 155033350 155034979 4 0.218625 +
22 ibm_ACH-000658 chr1 155040234 155235496 111 0.589880 +
23 ibm_ACH-000658 chr1 155235497 155237830 3 0.206812 +
24 ibm_ACH-000658 chr1 155237883 156311447 317 0.532224 +
25 ibm_ACH-000658 chr1 156311790 157544689 275 0.721402 +
26 ibm_ACH-000658 chr1 157544690 157698152 26 0.763880 +
27 ibm_ACH-000658 chr1 157698153 160176854 305 0.645724 +
28 ibm_ACH-000658 chr1 160177268 160186519 4 0.728144 +
29 ibm_ACH-000658 chr1 160186520 161368120 324 0.577146 +
... ... ... ... ... ... ... ...
1246 ibm_ACH-002446 chr19 43798204 45419449 330 -0.062992 0
1247 ibm_ACH-002446 chr19 45420073 45687979 69 -0.134149 0
1248 ibm_ACH-002446 chr19 45688136 48444189 487 -0.055875 0
1249 ibm_ACH-002446 chr19 48445755 48446592 2 0.526846 +
1250 ibm_ACH-002446 chr19 48446593 48758472 93 0.017552 0
1251 ibm_ACH-002446 chr19 48795175 49024230 108 -0.074870 0
1252 ibm_ACH-002446 chr19 49031845 49451189 155 -0.108942 0
1253 ibm_ACH-002446 chr19 49451234 49453673 2 -0.133832 0
1254 ibm_ACH-002446 chr19 49458235 51380384 574 -0.116783 0
1255 ibm_ACH-002446 chr19 51380385 51411623 5 -0.407410 -
1256 ibm_ACH-002446 chr19 51413461 51627790 44 0.029062 0
1257 ibm_ACH-002446 chr19 51643104 51693589 6 -1.829497 -
1258 ibm_ACH-002446 chr19 51713176 54240058 366 -0.076702 0
1259 ibm_ACH-002446 chr19 54240059 54242109 3 -1.852959 -
1260 ibm_ACH-002446 chr19 54250535 54280381 22 -0.214645 0
1261 ibm_ACH-002446 chr19 54280382 54587598 84 -0.110817 0
1262 ibm_ACH-002446 chr19 54593994 54601069 9 1.345228 +
1263 ibm_ACH-002446 chr19 54630716 54906429 91 0.115034 0
1264 ibm_ACH-002446 chr19 54906430 55028114 35 -0.355607 -
1265 ibm_ACH-002446 chr19 55031888 58572882 645 -0.033537 0
1266 ibm_ACH-002446 chr20 87459 14685211 933 -0.375277 -
1267 ibm_ACH-002446 chr20 15229689 26091794 545 -0.782074 -
1268 ibm_ACH-002446 chr20 26103222 48748132 1966 0.206010 +
1269 ibm_ACH-002446 chr20 48921639 64273852 1233 0.694498 +
1270 ibm_ACH-002446 chr21 9068165 10650084 30 -1.730240 -
1271 ibm_ACH-002446 chr21 13371245 46664624 1921 -0.413457 -
1272 ibm_ACH-002446 chr22 15698411 50799394 4022 -0.095532 0
1273 ibm_ACH-002446 chrX 2781815 49317540 2166 -0.906567 -
1274 ibm_ACH-002446 chrX 49322943 49323511 1 -29.450117 -
1275 ibm_ACH-002446 chrX 49345546 155545528 4264 -0.877244 -

1276 rows × 7 columns

In [31]:
prev = set(tc.get(name='segmented-cn-wes-prioritzed-7fe1', version=27, file='wes.19Q3interim.gene').index.values.tolist())
new1 = set(genecn.index.values.tolist())
new2 = set(segmentcn['Sample'].values.tolist())
print(len(prev), len(prev & new1), len(new1), len(new1 & new2))
1695 1695 1702 7
In [32]:
checkAmountOfSegments(segmentcn,thresh = 750)
In [33]:
checkGeneChangeAccrossAll(genecn, thresh=1.5)
Out[33]:
array([], dtype=object)
In [38]:
newsamples =  list(set(segmentcn["Sample"].tolist()))
In [30]:
segmentcn
Out[30]:
Sample CONTIG START END NUM_POINTS_COPY_RATIO MEAN_LOG2_COPY_RATIO CALL
0 ibm_ACH-001518 chr1 785776 16456145 2069 0.066190 0
1 ibm_ACH-001518 chr1 16458591 16975982 61 0.569518 +
2 ibm_ACH-001518 chr1 16976250 101022017 7024 0.025315 0
3 ibm_ACH-001518 chr1 101025058 149487635 1672 0.252054 +
4 ibm_ACH-001518 chr1 149487762 149488437 1 -3.705048 -
5 ibm_ACH-001518 chr1 149783752 248918615 8383 0.031559 0
6 ibm_ACH-001518 chr2 41357 47570583 2999 0.003066 0
7 ibm_ACH-001518 chr2 47678000 47783745 2 -19.944018 -
8 ibm_ACH-001518 chr2 47790676 86847025 2131 -0.013272 0
9 ibm_ACH-001518 chr2 86857806 87187492 3 -4.853835 -
10 ibm_ACH-001518 chr2 87338261 88861558 64 -0.115386 0
11 ibm_ACH-001518 chr2 88861725 90234982 54 0.579912 +
12 ibm_ACH-001518 chr2 91940935 178554334 4570 -0.001669 0
13 ibm_ACH-001518 chr2 178554335 178680581 152 0.458965 +
14 ibm_ACH-001518 chr2 178680829 178834690 139 -0.284919 -
15 ibm_ACH-001518 chr2 178836615 219515558 2806 0.001125 0
16 ibm_ACH-001518 chr2 219531507 221430403 89 0.696533 +
17 ibm_ACH-001518 chr2 221433891 242004734 1751 0.035405 0
18 ibm_ACH-001518 chr3 197398 198170102 11743 0.022799 0
19 ibm_ACH-001518 chr4 85481 165182806 6880 0.038329 0
20 ibm_ACH-001518 chr4 165207626 190082636 818 -0.974435 -
21 ibm_ACH-001518 chr5 140057 36976654 1244 0.070916 0
22 ibm_ACH-001518 chr5 36984425 37065144 38 -0.570981 -
23 ibm_ACH-001518 chr5 37107351 172968851 6503 0.025073 0
24 ibm_ACH-001518 chr5 172969162 175926991 72 -0.954518 -
25 ibm_ACH-001518 chr5 175967691 181261065 753 0.066945 0
26 ibm_ACH-001518 chr6 203183 32005939 2092 0.039428 0
27 ibm_ACH-001518 chr6 32005940 32098456 67 -0.511668 -
28 ibm_ACH-001518 chr6 32115488 170583999 7631 0.029312 0
29 ibm_ACH-001518 chr7 192950 142066081 7902 0.038898 0
... ... ... ... ... ... ... ...
2296 ibm_ACH-002069 chr19 50876221 51627790 167 0.333163 +
2297 ibm_ACH-002069 chr19 51643104 51645656 4 -29.707383 -
2298 ibm_ACH-002069 chr19 51692734 54219697 358 0.231685 +
2299 ibm_ACH-002069 chr19 54220373 54239341 9 1.368374 +
2300 ibm_ACH-002069 chr19 54239446 54455767 76 0.185947 +
2301 ibm_ACH-002069 chr19 54455768 54822110 106 0.477611 +
2302 ibm_ACH-002069 chr19 54824777 55181960 143 0.209391 +
2303 ibm_ACH-002069 chr19 55181961 58572882 565 0.317124 +
2304 ibm_ACH-002069 chr20 87459 64273852 4677 0.257016 +
2305 ibm_ACH-002069 chr21 9068165 10542698 9 -0.820465 -
2306 ibm_ACH-002069 chr21 10543076 14643383 65 0.120287 0
2307 ibm_ACH-002069 chr21 14658498 25706340 123 0.810637 +
2308 ibm_ACH-002069 chr21 25709183 25709701 1 -2.197575 -
2309 ibm_ACH-002069 chr21 25711971 29065357 126 0.880122 +
2310 ibm_ACH-002069 chr21 29066465 43797591 1106 0.465867 +
2311 ibm_ACH-002069 chr21 43797592 46664624 521 -0.123504 0
2312 ibm_ACH-002069 chr22 15698411 22646638 723 0.110341 0
2313 ibm_ACH-002069 chr22 22646639 22901545 33 0.616361 +
2314 ibm_ACH-002069 chr22 22906092 38962354 1814 0.067907 0
2315 ibm_ACH-002069 chr22 38964362 38992738 8 -22.977457 -
2316 ibm_ACH-002069 chr22 39014112 42128567 504 -0.001952 0
2317 ibm_ACH-002069 chr22 42128568 42141260 5 0.413569 +
2318 ibm_ACH-002069 chr22 42141261 50507942 823 0.075072 0
2319 ibm_ACH-002069 chr22 50515471 50799394 112 -1.193511 -
2320 ibm_ACH-002069 chrX 2781815 8170446 112 -0.926466 -
2321 ibm_ACH-002069 chrX 8465649 8533305 2 -3.840768 -
2322 ibm_ACH-002069 chrX 8534068 14920061 280 -1.034343 -
2323 ibm_ACH-002069 chrX 15244255 79172261 3074 -0.027348 0
2324 ibm_ACH-002069 chrX 79360838 85092813 114 -0.580956 -
2325 ibm_ACH-002069 chrX 85092935 155545528 2849 -0.971653 -

2326 rows × 7 columns

In [5]:
newsamples= refwm.get_sample_sets().loc[sample_set_id].samples
In [6]:
samples = refwm.get_samples()
plots = samples.loc[samples.index.isin(newsamples)]["modeled_segments_plot_tumor"].values
for plot in plots:
    ! gsutil cp $plot temp/
In [10]:
for plot in plots:
    display(Image('temp/'+plot.split('/')[-1]))

Upload to taiga

In [180]:
tc.update_dataset(dataset_permaname="segmented-cn-wes-prioritzed-7fe1", 
                  upload_file_path_dict={'temp/wes.'+sample_set_id+'.gene.cn': 'NumericMatrixCSV',
                                        'temp/wes.'+sample_set_id+'.segmented.cn': 'TableCSV'},
                dataset_description="updating to "+sample_set_id,
                force_remove=True
                 )
Uploading wes.19Q3.gene...
----------------------------------------------------------
KeyboardInterrupt        Traceback (most recent call last)
<ipython-input-180-343c29b9514b> in <module>
      3                                         'temp/wes.'+sample_set_id+'.segmented.cn': 'TableCSV'},
      4                 dataset_description="updating to "+sample_set_id,
----> 5                 force_remove=True
      6                  )

/anaconda3/envs/py36/lib/python3.6/site-packages/taigapy/__init__.py in update_dataset(self, dataset_id, dataset_permaname, dataset_version, dataset_description, upload_file_path_dict, force_keep, force_remove)
    659             keep_datafile_id_list = []
    660 
--> 661         new_session_id = self.upload_session_files(upload_file_path_dict=upload_file_path_dict)
    662 
    663         new_dataset_version_params = dict()

/anaconda3/envs/py36/lib/python3.6/site-packages/taigapy/__init__.py in upload_session_files(self, upload_file_path_dict)
    499 
    500             s3_client.upload_file(upload_file_path, bucket,
--> 501                                   upload_file_object.prefix_and_file_name)
    502 
    503             S3UploadedData = s3_client.get_object(Bucket=bucket, Key=upload_file_object.prefix_and_file_name)

/anaconda3/envs/py36/lib/python3.6/site-packages/boto3/s3/inject.py in upload_file(self, Filename, Bucket, Key, ExtraArgs, Callback, Config)
    129         return transfer.upload_file(
    130             filename=Filename, bucket=Bucket, key=Key,
--> 131             extra_args=ExtraArgs, callback=Callback)
    132 
    133 

/anaconda3/envs/py36/lib/python3.6/site-packages/boto3/s3/transfer.py in upload_file(self, filename, bucket, key, callback, extra_args)
    277             filename, bucket, key, extra_args, subscribers)
    278         try:
--> 279             future.result()
    280         # If a client error was raised, add the backwards compatibility layer
    281         # that raises a S3UploadFailedError. These specific errors were only

/anaconda3/envs/py36/lib/python3.6/site-packages/s3transfer/futures.py in result(self)
    107         except KeyboardInterrupt as e:
    108             self.cancel()
--> 109             raise e
    110 
    111     def cancel(self):

/anaconda3/envs/py36/lib/python3.6/site-packages/s3transfer/futures.py in result(self)
    104             # however if a KeyboardInterrupt is raised we want want to exit
    105             # out of this and propogate the exception.
--> 106             return self._coordinator.result()
    107         except KeyboardInterrupt as e:
    108             self.cancel()

/anaconda3/envs/py36/lib/python3.6/site-packages/s3transfer/futures.py in result(self)
    258         # possible value integer value, which is on the scale of billions of
    259         # years...
--> 260         self._done_event.wait(MAXINT)
    261 
    262         # Once done waiting, raise an exception if present or return the

/anaconda3/envs/py36/lib/python3.6/threading.py in wait(self, timeout)
    549             signaled = self._flag
    550             if not signaled:
--> 551                 signaled = self._cond.wait(timeout)
    552             return signaled
    553 

/anaconda3/envs/py36/lib/python3.6/threading.py in wait(self, timeout)
    293         try:    # restore state no matter what (e.g., KeyboardInterrupt)
    294             if timeout is None:
--> 295                 waiter.acquire()
    296                 gotit = True
    297             else:

KeyboardInterrupt: 
In [ ]: